library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.4     ✓ purrr   0.3.4
✓ tibble  3.1.2     ✓ dplyr   1.0.7
✓ tidyr   1.1.3     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.1
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(SingleCellExperiment)
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats

Attaching package: ‘matrixStats’

The following object is masked from ‘package:dplyr’:

    count


Attaching package: ‘MatrixGenerics’

The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, colCounts, colCummaxs, colCummins, colCumprods, colCumsums, colDiffs, colIQRDiffs, colIQRs,
    colLogSumExps, colMadDiffs, colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, colWeightedMeans, colWeightedMedians, colWeightedSds, colWeightedVars, rowAlls, rowAnyNAs, rowAnys,
    rowAvgsPerColSet, rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, rowMadDiffs, rowMads,
    rowMaxs, rowMeans2, rowMedians, rowMins, rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs,
    rowVars, rowWeightedMads, rowWeightedMeans, rowWeightedMedians, rowWeightedSds, rowWeightedVars

Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
    is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min

Loading required package: S4Vectors

Attaching package: ‘S4Vectors’

The following objects are masked from ‘package:dplyr’:

    first, rename

The following object is masked from ‘package:tidyr’:

    expand

The following objects are masked from ‘package:base’:

    expand.grid, I, unname

Loading required package: IRanges

Attaching package: ‘IRanges’

The following objects are masked from ‘package:dplyr’:

    collapse, desc, slice

The following object is masked from ‘package:purrr’:

    reduce

Loading required package: GenomeInfoDb
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'.


Attaching package: ‘Biobase’

The following object is masked from ‘package:MatrixGenerics’:

    rowMedians

The following objects are masked from ‘package:matrixStats’:

    anyMissing, rowMedians
# Make sure that you use the forked version
# devtools::install_github("const-ae/sctransform@offset_with_flexible_theta")
library(sctransform)

Prepare Data

# Work around for some bug in zellkonverter
# https://github.com/theislab/zellkonverter/issues/45
setAs("dgRMatrix", to = "dgCMatrix", function(from){
  as(as(from, "CsparseMatrix"), "dgCMatrix")
})
if(! file.exists("../data/nih3t3.h5ad")){
  download.file("https://data.caltech.edu/tindfiles/serve/a448e98e-89cd-47b3-a134-803bbde29781/", "../data/nih3t3.h5ad")
}
if(! file.exists("../data/hek293t.h5ad")){
  download.file("https://data.caltech.edu/tindfiles/serve/b2758046-9247-43ab-b8f0-68882b4f39a3/", "../data/hek293t.h5ad")
}

Helper function, so that the content of the sctransform results stay in order

transform_sctrans_object <- function(sctrans_obj, Y){
  fit  <- list(overdispersions = rep(NA, nrow(Y)),
                  PearsonResiduals = array(NA, dim(Y)),
                  Mu <- array(NA, dim(Y)),
                  Beta = matrix(NA, nrow = nrow(Y), ncol = 2),
                  Beta_unreg = matrix(NA, nrow = nrow(Y), ncol = 2),
                  overdispersions_unreg = rep(NA, nrow(Y)),
                  gmean = rep(NA, nrow(Y)))
  sf <- colSums2(Y)
  
  indices <- match(rownames(sctrans_obj$model_pars_fit), rownames(Y))
  indices_unreg <- match(rownames(sctrans_obj$model_pars), rownames(Y))
  fit$overdispersions[indices] <- 1/sctrans_obj$model_pars_fit[,"theta"]
  fit$PearsonResiduals[indices, ] <- sctrans_obj$y
  fit$Beta[indices, ] <- sctrans_obj$model_pars_fit[, c("(Intercept)", "log_umi")]
  fit$Beta_unreg[indices_unreg, ] <- sctrans_obj$model_pars[, c("(Intercept)", "log_umi")]
  fit$Mu <- exp(fit$Beta %*% t(cbind(1, log10(sf))))
  fit$Mu_unreg <- exp(fit$Beta_unreg %*% t(cbind(1, log10(sf))))
  fit$overdispersions_unreg[indices_unreg] <- 1 / sctrans_obj$model_pars[, "theta"]
  fit$gmean[indices] <- sctrans_obj$gene_attr$gmean
  
  fit
}

Load Data

load_data_fnc <- list(
  NIH3T3 = function() zellkonverter::readH5AD("../data/nih3t3.h5ad"),
  HEK239T = function() zellkonverter::readH5AD("../data/hek293t.h5ad"),
  PBMC4k = function() TENxPBMCData::TENxPBMCData("pbmc4k"),
  Mesmer = function() scRNAseq::MessmerESCData(), 
  Zeisel = function() scRNAseq::ZeiselBrainData(),
  Zhen = function() DuoClustering2018::sce_filteredExpr10_Zhengmix4eq()
)

Run sctransform 3 times on each dataset


full_data <- bind_rows(lapply(names(load_data_fnc), function(name){
  print(paste0("Loading ", name))
  
  se <- load_data_fnc[[name]]()
  
  rownames(se) <- paste0("Gene_", 1:nrow(se))
  colnames(se) <- paste0("Cell_", 1:ncol(se))
  # Subsample data to 3000x1000 elements
  se_red <- se[sample(seq_len(nrow(se)), min(3000, nrow(se))), sample(seq_len(ncol(se)), min(1000, ncol(se)))]
  
  Y <- as.matrix(assay(se_red))
  
  set.seed(1)
  sctrans_intermediate <- vst(Y, method = "poisson")
  fit_pois <- transform_sctrans_object(sctrans_intermediate, Y)
  # To make sure that I am actually running the correct sctransform version
  # I check if the indicator value is set
  stopifnot(isTRUE(sctrans_intermediate$is_fork))
  
  set.seed(1)
  sctrans_intermediate2 <- vst(Y, method = "offset", theta_given = NULL)
  fit_off <- transform_sctrans_object(sctrans_intermediate2, Y)
  
  set.seed(1)
  sctrans_intermediate3 <- vst(Y, method = "offset", theta_given = 100)
  fit_off_fixed <- transform_sctrans_object(sctrans_intermediate3, Y)
  
  theta_given <- rep(100, nrow(Y))
  names(theta_given) <- rownames(Y)
  set.seed(1)
  sctrans_intermediate4 <- vst(Y, method = "nb_theta_given", theta_given = theta_given)
  fit_pois_fixed <- transform_sctrans_object(sctrans_intermediate4, Y)

  
  tibble(Dataset = name,
         Poisson = c(fit_pois$PearsonResiduals),
         PoissonFixed = c(fit_pois_fixed$PearsonResiduals),
         Offset = c(fit_off$PearsonResiduals),
         OffsetFixed = c(fit_off_fixed$PearsonResiduals),
         ExpressionLevel = c(fit_off$Mu),
         y = c(Y))
}))
[1] "Loading NIH3T3"
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 852 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
snapshotDate(): 2021-05-18

  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 41 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 852 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 13.36397 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Found 28 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 852 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 3.772232 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 852 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.6714771 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 852 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 852 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.452082 secs
[1] "Loading HEK239T"
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 927 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 38 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 927 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.839784 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Found 15 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 927 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 3.911496 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 927 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.665138 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 927 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 1 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 927 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.321753 secs
[1] "Loading PBMC4k"
snapshotDate(): 2021-05-18
see ?TENxPBMCData and browseVignettes('TENxPBMCData') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1127 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 80 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1127 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.376468 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Found 49 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1127 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.140258 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 1127 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7097969 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1127 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 1127 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===========================================================                                                                                                                     |  33%
  |                                                                                                                                                                                      
  |=====================================================================================================================                                                           |  67%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.405725 secs
[1] "Loading Mesmer"
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
loading from cache
require("ensembldb")
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1652 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
Found 27 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1652 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.554952 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Found 2 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1652 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.278342 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 1652 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7898989 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1652 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 1652 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 8.076696 secs
[1] "Loading Zeisel"
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
Found 4 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 2391 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===================================                                                                                                                                             |  20%
  |                                                                                                                                                                                      
  |======================================================================                                                                                                          |  40%
  |                                                                                                                                                                                      
  |==========================================================================================================                                                                      |  60%
  |                                                                                                                                                                                      
  |=============================================================================================================================================                                   |  80%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.448519 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Found 3 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 2391 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===================================                                                                                                                                             |  20%
  |                                                                                                                                                                                      
  |======================================================================                                                                                                          |  40%
  |                                                                                                                                                                                      
  |==========================================================================================================                                                                      |  60%
  |                                                                                                                                                                                      
  |=============================================================================================================================================                                   |  80%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.491939 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Found 2 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 2391 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===================================                                                                                                                                             |  20%
  |                                                                                                                                                                                      
  |======================================================================                                                                                                          |  40%
  |                                                                                                                                                                                      
  |==========================================================================================================                                                                      |  60%
  |                                                                                                                                                                                      
  |=============================================================================================================================================                                   |  80%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.9169271 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 1 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 2391 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |===================================                                                                                                                                             |  20%
  |                                                                                                                                                                                      
  |======================================================================                                                                                                          |  40%
  |                                                                                                                                                                                      
  |==========================================================================================================                                                                      |  60%
  |                                                                                                                                                                                      
  |=============================================================================================================================================                                   |  80%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 9.207349 secs
[1] "Loading Zhen"
see ?DuoClustering2018 and browseVignettes('DuoClustering2018') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1556 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 15 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1556 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.262152 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Found 18 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1556 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.017199 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Found 6 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1556 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7863991 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1556 genes, 1000 cells

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Found 2 outliers - those will be ignored in fitting/regularization step

Second step: Get residuals using fitted parameters for 1556 genes

  |                                                                                                                                                                                      
  |                                                                                                                                                                                |   0%
  |                                                                                                                                                                                      
  |============================================                                                                                                                                    |  25%
  |                                                                                                                                                                                      
  |========================================================================================                                                                                        |  50%
  |                                                                                                                                                                                      
  |====================================================================================================================================                                            |  75%
  |                                                                                                                                                                                      
  |================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.148958 secs
p1 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = Offset - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~beta[s]==1),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[1]))


p1

p2 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = PoissonFixed - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~alpha==0.01),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[2]))

p2

p3 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = OffsetFixed - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~beta[s]==1~and~alpha==0.01),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[3]))

p3

library(patchwork)
res <- (p1 / p2 / p3) +
  plot_annotation(tag_levels = "A") + plot_layout(guides = "collect")

res

cowplot::save_plot("../output/sctransform_beta_s_and_alpha_importance.pdf", res, 
                   ncol = 7, nrow = 3, base_asp = 0.6, base_height = 3, device = cairo_pdf)

Session Info

sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Mojave 10.14.6

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] patchwork_1.1.1             DuoClustering2018_1.10.0    ensembldb_2.16.0            AnnotationFilter_1.16.0     GenomicFeatures_1.44.0      AnnotationDbi_1.54.1       
 [7] scRNAseq_2.6.1              TENxPBMCData_1.10.0         HDF5Array_1.20.0            rhdf5_2.36.0                DelayedArray_0.18.0         Matrix_1.3-4               
[13] sctransform_0.3.2.9002      SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0 Biobase_2.52.0              GenomicRanges_1.44.0        GenomeInfoDb_1.28.0        
[19] IRanges_2.26.0              S4Vectors_0.30.0            BiocGenerics_0.38.0         MatrixGenerics_1.4.0        matrixStats_0.59.0          forcats_0.5.1              
[25] stringr_1.4.0               dplyr_1.0.7                 purrr_0.3.4                 readr_1.4.0                 tidyr_1.1.3                 tibble_3.1.2               
[31] ggplot2_3.3.4               tidyverse_1.3.1            

loaded via a namespace (and not attached):
  [1] readxl_1.3.1                  backports_1.2.1               AnnotationHub_3.0.0           BiocFileCache_2.0.0           plyr_1.8.6                    lazyeval_0.2.2               
  [7] BiocParallel_1.26.0           listenv_0.8.0                 scattermore_0.7               digest_0.6.27                 htmltools_0.5.1.1             viridis_0.6.1                
 [13] fansi_0.5.0                   magrittr_2.0.1                memoise_2.0.0                 globals_0.14.0                Biostrings_2.60.1             modelr_0.1.8                 
 [19] prettyunits_1.1.1             colorspace_2.0-1              blob_1.2.1                    rvest_1.0.0                   rappdirs_0.3.3                haven_2.4.1                  
 [25] xfun_0.24                     crayon_1.4.1                  RCurl_1.98-1.3                jsonlite_1.7.2                glue_1.4.2                    gtable_0.3.0                 
 [31] zlibbioc_1.38.0               XVector_0.32.0                Rhdf5lib_1.14.1               future.apply_1.7.0            scales_1.1.1                  DBI_1.1.1                    
 [37] ggthemes_4.2.4                Rcpp_1.0.6                    viridisLite_0.4.0             xtable_1.8-4                  progress_1.2.2                reticulate_1.20              
 [43] mclust_5.4.7                  bit_4.0.4                     httr_1.4.2                    dir.expiry_1.0.0              ellipsis_0.3.2                farver_2.1.0                 
 [49] pkgconfig_2.0.3               XML_3.99-0.6                  dbplyr_2.1.1                  utf8_1.2.1                    labeling_0.4.2                tidyselect_1.1.1             
 [55] rlang_0.4.11                  reshape2_1.4.4                later_1.2.0                   munsell_0.5.0                 BiocVersion_3.13.1            cellranger_1.1.0             
 [61] tools_4.1.0                   cachem_1.0.5                  cli_2.5.0                     generics_0.1.0                RSQLite_2.2.7                 ExperimentHub_2.0.0          
 [67] broom_0.7.7                   fastmap_1.1.0                 yaml_2.2.1                    knitr_1.33                    bit64_4.0.5                   fs_1.5.0                     
 [73] KEGGREST_1.32.0               future_1.21.0                 mime_0.10                     xml2_1.3.2                    biomaRt_2.48.1                compiler_4.1.0               
 [79] rstudioapi_0.13               filelock_1.0.2                curl_4.3.1                    png_0.1-7                     interactiveDisplayBase_1.30.0 reprex_2.0.0                 
 [85] stringi_1.6.2                 basilisk.utils_1.4.0          lattice_0.20-44               ProtGenerics_1.24.0           vctrs_0.3.8                   pillar_1.6.1                 
 [91] lifecycle_1.0.0               rhdf5filters_1.4.0            BiocManager_1.30.16           cowplot_1.1.1                 bitops_1.0-7                  httpuv_1.6.1                 
 [97] rtracklayer_1.52.0            R6_2.5.0                      BiocIO_1.2.0                  promises_1.2.0.1              gridExtra_2.3                 parallelly_1.26.0            
[103] codetools_0.2-18              zellkonverter_1.2.0           MASS_7.3-54                   assertthat_0.2.1              rjson_0.2.20                  withr_2.4.2                  
[109] GenomicAlignments_1.28.0      Rsamtools_2.8.0               GenomeInfoDbData_1.2.6        hms_1.1.0                     grid_4.1.0                    basilisk_1.4.0               
[115] shiny_1.6.0                   lubridate_1.7.10              restfulr_0.0.13              
---
title: "R Notebook"
output: html_notebook
---


```{r}
library(tidyverse)
library(SingleCellExperiment)
# Make sure that you use the forked version
# devtools::install_github("const-ae/sctransform@offset_with_flexible_theta")
library(sctransform)
```

# Prepare Data

```{r}
# Work around for some bug in zellkonverter
# https://github.com/theislab/zellkonverter/issues/45
setAs("dgRMatrix", to = "dgCMatrix", function(from){
  as(as(from, "CsparseMatrix"), "dgCMatrix")
})
```

```{r}
if(! file.exists("../data/nih3t3.h5ad")){
  download.file("https://data.caltech.edu/tindfiles/serve/a448e98e-89cd-47b3-a134-803bbde29781/", "../data/nih3t3.h5ad")
}
if(! file.exists("../data/hek293t.h5ad")){
  download.file("https://data.caltech.edu/tindfiles/serve/b2758046-9247-43ab-b8f0-68882b4f39a3/", "../data/hek293t.h5ad")
}
```



Helper function, so that the content of the sctransform results stay in order
```{r}
transform_sctrans_object <- function(sctrans_obj, Y){
  fit  <- list(overdispersions = rep(NA, nrow(Y)),
                  PearsonResiduals = array(NA, dim(Y)),
                  Mu <- array(NA, dim(Y)),
                  Beta = matrix(NA, nrow = nrow(Y), ncol = 2),
                  Beta_unreg = matrix(NA, nrow = nrow(Y), ncol = 2),
                  overdispersions_unreg = rep(NA, nrow(Y)),
                  gmean = rep(NA, nrow(Y)))
  sf <- colSums2(Y)
  
  indices <- match(rownames(sctrans_obj$model_pars_fit), rownames(Y))
  indices_unreg <- match(rownames(sctrans_obj$model_pars), rownames(Y))
  fit$overdispersions[indices] <- 1/sctrans_obj$model_pars_fit[,"theta"]
  fit$PearsonResiduals[indices, ] <- sctrans_obj$y
  fit$Beta[indices, ] <- sctrans_obj$model_pars_fit[, c("(Intercept)", "log_umi")]
  fit$Beta_unreg[indices_unreg, ] <- sctrans_obj$model_pars[, c("(Intercept)", "log_umi")]
  fit$Mu <- exp(fit$Beta %*% t(cbind(1, log10(sf))))
  fit$Mu_unreg <- exp(fit$Beta_unreg %*% t(cbind(1, log10(sf))))
  fit$overdispersions_unreg[indices_unreg] <- 1 / sctrans_obj$model_pars[, "theta"]
  fit$gmean[indices] <- sctrans_obj$gene_attr$gmean
  
  fit
}
```


Load Data

```{r}
load_data_fnc <- list(
  NIH3T3 = function() zellkonverter::readH5AD("../data/nih3t3.h5ad"),
  HEK239T = function() zellkonverter::readH5AD("../data/hek293t.h5ad"),
  PBMC4k = function() TENxPBMCData::TENxPBMCData("pbmc4k"),
  Mesmer = function() scRNAseq::MessmerESCData(), 
  Zeisel = function() scRNAseq::ZeiselBrainData(),
  Zhen = function() DuoClustering2018::sce_filteredExpr10_Zhengmix4eq()
)
```

Run sctransform 3 times on each dataset

- Standard Poisson model
- Offset with flexible theta estimation
- Offset with fixed theta

```{r, warning = FALSE}

full_data <- bind_rows(lapply(names(load_data_fnc), function(name){
  print(paste0("Loading ", name))
  
  se <- load_data_fnc[[name]]()
  
  rownames(se) <- paste0("Gene_", 1:nrow(se))
  colnames(se) <- paste0("Cell_", 1:ncol(se))
  # Subsample data to 3000x1000 elements
  se_red <- se[sample(seq_len(nrow(se)), min(3000, nrow(se))), sample(seq_len(ncol(se)), min(1000, ncol(se)))]
  
  Y <- as.matrix(assay(se_red))
  
  set.seed(1)
  sctrans_intermediate <- vst(Y, method = "poisson")
  fit_pois <- transform_sctrans_object(sctrans_intermediate, Y)
  # To make sure that I am actually running the correct sctransform version
  # I check if the indicator value is set
  stopifnot(isTRUE(sctrans_intermediate$is_fork))
  
  set.seed(1)
  sctrans_intermediate2 <- vst(Y, method = "offset", theta_given = NULL)
  fit_off <- transform_sctrans_object(sctrans_intermediate2, Y)
  
  set.seed(1)
  sctrans_intermediate3 <- vst(Y, method = "offset", theta_given = 100)
  fit_off_fixed <- transform_sctrans_object(sctrans_intermediate3, Y)
  
  theta_given <- rep(100, nrow(Y))
  names(theta_given) <- rownames(Y)
  set.seed(1)
  sctrans_intermediate4 <- vst(Y, method = "nb_theta_given", theta_given = theta_given)
  fit_pois_fixed <- transform_sctrans_object(sctrans_intermediate4, Y)

  
  tibble(Dataset = name,
         Poisson = c(fit_pois$PearsonResiduals),
         PoissonFixed = c(fit_pois_fixed$PearsonResiduals),
         Offset = c(fit_off$PearsonResiduals),
         OffsetFixed = c(fit_off_fixed$PearsonResiduals),
         ExpressionLevel = c(fit_off$Mu),
         y = c(Y))
}))


```






```{r, fig.width=15, fig.height=3}
p1 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = Offset - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~beta[s]==1),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[1]))


p1
```

```{r, fig.width=15, fig.height=3}
p2 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = PoissonFixed - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~alpha==0.01),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[2]))

p2
```



```{r, fig.width=15, fig.height=3}
p3 <- full_data %>%
  drop_na() %>%
  # sample_n(5000) %>%
  ggplot(aes(x= Poisson, y = OffsetFixed - Poisson)) +
    geom_hline(yintercept = 0, color = "#E9E9E9") +
    scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
    scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8, 
                          trans = "log1p", breaks = c(0, 10, 100, 1000),
                          labels = c(0, 10, 100, ">1000"), limits = c(0, 1001), 
                          oob = scales::oob_squish_any, 
                          minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
    scale_y_continuous(limits = c(-30, 30)) +
    facet_wrap(~ Dataset, nrow = 1) +
    cowplot::theme_cowplot() +
    labs(title = expression(Effect~of~fixing~beta[s]==1~and~alpha==0.01),
         color = "Y", 
         x = "Residuals from the default sctransform model",
         y = expression(Delta[3]))

p3
```



```{r, fig.width=20, fig.height=10}
library(patchwork)
res <- (p1 / p2 / p3) +
  plot_annotation(tag_levels = "A") + plot_layout(guides = "collect")

res
```


```{r}
cowplot::save_plot("../output/sctransform_beta_s_and_alpha_importance.pdf", res, 
                   ncol = 7, nrow = 3, base_asp = 0.6, base_height = 3, device = cairo_pdf)
```





# Session Info

```{r}
sessionInfo()
```


 